# Read in data from DQYDJ website and CPS original data.
if (!require("ipumsr")) stop("Reading IPUMS data into R requires the ipumsr package. It can be installed using the following command: install.packages('ipumsr')")
library(wCorr)
library(tidyr)
library(dplyr)
library(janitor)
library(readr)
library(ggplot2)
library(DescTools)
# Read in raw data.
# data_name <- "cps_00012.xml"
data_name <- "cps_00021.xml"
file_path <- paste0("../data/", data_name)
raw_2021 <- read_ipums_ddi(file_path) %>% read_ipums_micro
# Read in data from DQYDJ website and CPS original data.
if (!require("ipumsr")) stop("Reading IPUMS data into R requires the ipumsr package. It can be installed using the following command: install.packages('ipumsr')")
library(wCorr)
library(tidyr)
library(dplyr)
library(janitor)
library(readr)
library(ggplot2)
library(DescTools)
path_data = "../data/"
# Read in raw data.
# data_name <- "cps_00012.xml"
data_name <- "cps_00021.xml"
raw_2021 <- read_ipums_ddi(paste0(path_data, data_name)) %>% read_ipums_micro
# Read in data from DQYDJ website and CPS original data.
if (!require("ipumsr")) stop("Reading IPUMS data into R requires the ipumsr package. It can be installed using the following command: install.packages('ipumsr')")
library(wCorr)
library(tidyr)
library(dplyr)
library(janitor)
library(readr)
library(ggplot2)
library(DescTools)
path_data = "../data/"
# Read in raw data.
# data_name <- "cps_00012.xml"
data_name <- "cps_00021.xml"
raw_2021 <- read_ipums_ddi(paste0(path_data, data_name)) %>% read_ipums_micro
# Filter data for couples as per a custom methodology.
df <- raw_2021 %>%
clean_names %>%
filter(sploc != 0) %>%
# The criteria from DQYDJ.
filter(marst == 1, age %in% 25:65, wkswork1 >= 20, classwkr %in% 20:28) %>%
group_by(serial, sploc, pernum) %>%
mutate(pair = paste0(serial, "_", min(pernum, sploc), "-", max(pernum, sploc))) %>%
ungroup() %>%
group_by(pair) %>%
mutate(len = length(pair)) %>%
filter(len > 1) %>%
# Construct empirical data from filtered data.
rename(wt = asecwt) %>%
arrange(incwage) %>%
ungroup() %>%
mutate(pdf = wt / sum(wt),
cdf = cumsum(pdf),
percentile = ceiling(100 * cdf))
# Adjust so no percentile is over 100
df$percentile[df$percentile > 100] = 100
# Compute statistics within percentiles
df <- df %>%
select(wt, pair, incwage, sex, pdf, cdf, percentile) %>%
group_by(percentile) %>%
mutate(earnings_threshold_percentile = min(incwage),
pdf_percentile = pdf / sum(pdf),
mean_incwage_percentile = sum(pdf_percentile * incwage)) %>%
ungroup()
# write_csv(df, 'sample.csv')
write_csv(df, paste0(path_data, "sample.csv"))
# Read in data from DQYDJ website and CPS original data.
if (!require("ipumsr")) stop("Reading IPUMS data into R requires the ipumsr package. It can be installed using the following command: install.packages('ipumsr')")
library(wCorr)
library(tidyr)
library(dplyr)
library(janitor)
library(readr)
library(ggplot2)
library(DescTools)
path_data = "../data/"
df_new = read_csv(paste0(path_data, "sample.csv"))
df_old = read_csv("../data/archive/[old] sample.csv")
View(df_old)
for (col in colnames(df_new)) {
vnew = as.vector(df_new[[col]])
vold = as.vector(df_old[[col]])
print(col)
print(class(vnew))
if (class(vnew) == "numeric") {
print(all(abs(vnew - vold) < 0.01))
} else {
print(all(vnew == vold))
}
}
for (col in colnames(df_new)) {
vnew = as.vector(df_new[[col]])
vold = as.vector(df_old[[col]])
if (class(vnew) == "numeric") {
samevec = all(abs(vnew - vold) < 0.01)
} else {
samevec = all(vnew == vold)
}
if (samevec) {
print(col)
print(class(vnew))
}
}
for (col in colnames(df_new)) {
vnew = as.vector(df_new[[col]])
vold = as.vector(df_old[[col]])
if (class(vnew) == "numeric") {
samevec = all(abs(vnew - vold) < 0.01)
} else {
samevec = all(vnew == vold)
}
if (!samevec) {
print(col)
print(class(vnew))
}
}
for (col in colnames(df_new)) {
vnew = as.vector(df_new[[col]])
vold = as.vector(df_old[[col]])
if (class(vnew) == "numeric") {
samevec = all(abs(vnew - vold) < 0.001)
} else {
samevec = all(vnew == vold)
}
if (!samevec) {
print(col)
print(class(vnew))
}
}
View(df_new)
# Read in data from DQYDJ website and CPS original data.
if (!require("ipumsr")) stop("Reading IPUMS data into R requires the ipumsr package. It can be installed using the following command: install.packages('ipumsr')")
library(wCorr)
library(tidyr)
library(dplyr)
library(janitor)
library(readr)
library(ggplot2)
library(DescTools)
data_name <- "cps_00012.dat"
# Read in raw data
path_data = "../data/"
raw_2021 <- read_ipums_ddi(paste0(path_data, data_name)) %>% read_ipums_micro
data_name <- "cps_00012.xml"
# Read in raw data
path_data = "../data/"
raw_2021 <- read_ipums_ddi(paste0(path_data, data_name)) %>% read_ipums_micro
?mutate
# Filter data for couples as per a custom methodology.
df <- raw_2021 %>%
clean_names %>%
filter(sploc != 0) %>%
# The criteria from DQYDJ.
filter(marst == 1, age %in% 25:65, wkswork1 >= 20, classwkr %in% 20:28) %>%
group_by(serial, sploc, pernum)
raw_2021$SPLOC
df <- raw_2021 %>%
clean_names %>%
filter(sploc != 0) %>%
# The criteria from DQYDJ.
filter(marst == 1, age %in% 25:65, wkswork1 >= 20, classwkr %in% 20:28)
df$sploc
# Read in data from DQYDJ website and CPS original data.
if (!require("ipumsr")) stop("Reading IPUMS data into R requires the ipumsr package. It can be installed using the following command: install.packages('ipumsr')")
library(wCorr)
library(tidyr)
library(dplyr)
library(janitor)
library(readr)
library(ggplot2)
library(DescTools)
# CHANGE: IPUMS-CPS data name
data_name <- "[CPS NAME].xml"
# Read in raw data
path_data = "../data/"
raw_2021 <- read_ipums_ddi(paste0(path_data, data_name)) %>% read_ipums_micro
data_name <- "cps_0021.xml"
# Read in raw data
path_data = "../data/"
raw_2021 <- read_ipums_ddi(paste0(path_data, data_name)) %>% read_ipums_micro
data_name <- "cps_0012.xml"
# Filter data for couples as per a custom methodology.
df <- raw_2021 %>%
clean_names %>%
filter(sploc != 0) %>%
# The criteria from DQYDJ.
filter(marst == 1, age %in% 25:65, wkswork1 >= 20, classwkr %in% 20:28) %>%
group_by(serial, sploc, pernum) %>%
mutate(pair = paste0(serial, "_", min(pernum, sploc), "-", max(pernum, sploc))) %>%
ungroup() %>%
group_by(pair) %>%
mutate(len = length(pair)) %>%
filter(len > 1) %>%
# Construct empirical data from filtered data.
rename(wt = asecwt) %>%
arrange(incwage) %>%
ungroup() %>%
mutate(pdf = wt / sum(wt),
cdf = cumsum(pdf),
percentile = ceiling(100 * cdf))
path_data = "../data/"
raw_2021 <- read_ipums_ddi(paste0(path_data, data_name)) %>% read_ipums_micro
data_name <- "cps_00012.xml"
path_data = "../data/"
raw_2021 <- read_ipums_ddi(paste0(path_data, data_name)) %>% read_ipums_micro
raw_2021$YEAR
# Filter data for couples as per a custom methodology.
df <- raw_2021 %>%
clean_names %>%
filter(sploc != 0) %>%
# The criteria from DQYDJ.
filter(marst == 1, age %in% 25:65, wkswork1 >= 20, classwkr %in% 20:28) %>%
group_by(serial, sploc, pernum) %>%
mutate(pair = paste0(serial, "_", min(pernum, sploc), "-", max(pernum, sploc))) %>%
ungroup() %>%
group_by(pair) %>%
mutate(len = length(pair)) %>%
filter(len > 1) %>%
# Construct empirical data from filtered data.
rename(wt = asecwt) %>%
arrange(incwage) %>%
ungroup() %>%
mutate(pdf = wt / sum(wt),
cdf = cumsum(pdf),
percentile = ceiling(100 * cdf))
df$year
# Read in data from DQYDJ website and CPS original data.
if (!require("ipumsr")) stop("Reading IPUMS data into R requires the ipumsr package. It can be installed using the following command: install.packages('ipumsr')")
library(wCorr)
library(tidyr)
library(dplyr)
library(janitor)
library(readr)
library(ggplot2)
library(DescTools)
# CHANGE: IPUMS-CPS data name
# data_name <- "[CPS NAME].xml"
data_name <- "cps_0012.xml"
# Read in raw data
path_data = "../data/"
raw_2021 <- read_ipums_ddi(paste0(path_data, data_name)) %>% read_ipums_micro
# Read in data from DQYDJ website and CPS original data.
if (!require("ipumsr")) stop("Reading IPUMS data into R requires the ipumsr package. It can be installed using the following command: install.packages('ipumsr')")
library(wCorr)
library(tidyr)
library(dplyr)
library(janitor)
library(readr)
library(ggplot2)
library(DescTools)
# CHANGE: IPUMS-CPS data name
# data_name <- "[CPS NAME].xml"
data_name <- "cps_00012.xml"
# Read in raw data
path_data = "../data/"
raw_2021 <- read_ipums_ddi(paste0(path_data, data_name)) %>% read_ipums_micro
# Filter data for couples as per a custom methodology.
df <- raw_2021 %>%
clean_names %>%
filter(sploc != 0) %>%
# The criteria from DQYDJ.
filter(marst == 1, age %in% 25:65, wkswork1 >= 20, classwkr %in% 20:28) %>%
group_by(serial, sploc, pernum) %>%
mutate(pair = paste0(serial, "_", min(pernum, sploc), "-", max(pernum, sploc))) %>%
ungroup() %>%
group_by(pair) %>%
mutate(len = length(pair)) %>%
filter(len > 1) %>%
# Construct empirical data from filtered data.
rename(wt = asecwt) %>%
arrange(incwage) %>%
ungroup() %>%
mutate(pdf = wt / sum(wt),
cdf = cumsum(pdf),
percentile = ceiling(100 * cdf))
# Adjust so no percentile is over 100
df$percentile[df$percentile > 100] = 100
# Compute statistics within percentiles
df <- df %>%
select(wt, pair, incwage, sex, pdf, cdf, percentile) %>%
group_by(percentile) %>%
mutate(earnings_threshold_percentile = min(incwage),
pdf_percentile = pdf / sum(pdf),
mean_incwage_percentile = sum(pdf_percentile * incwage)) %>%
ungroup()
# Write CSV
write_csv(df, paste0(path_data, "sample.csv"))
# Read in data from DQYDJ website and CPS original data.
if (!require("ipumsr")) stop("Reading IPUMS data into R requires the ipumsr package. It can be installed using the following command: install.packages('ipumsr')")
library(wCorr)
library(tidyr)
library(dplyr)
library(janitor)
library(readr)
library(ggplot2)
library(DescTools)
# CHANGE: IPUMS-CPS data name
# data_name <- "[CPS NAME].xml"
data_name <- "cps_00012.xml"
# Read in raw data
path_data = "../data/"
raw_2021 <- read_ipums_ddi(paste0(path_data, data_name)) %>% read_ipums_micro
# Filter data for couples as per a custom methodology.
df <- raw_2021 %>%
clean_names %>%
filter(sploc != 0) %>%
# The criteria from DQYDJ.
filter(marst == 1, age %in% 25:65, wkswork1 >= 20, classwkr %in% 20:28) %>%
group_by(serial, sploc, pernum) %>%
mutate(pair = paste0(serial, "_", min(pernum, sploc), "-", max(pernum, sploc))) %>%
ungroup() %>%
group_by(pair) %>%
mutate(len = length(pair)) %>%
filter(len > 1) %>%
# Construct empirical data from filtered data.
rename(wt = asecwt) %>%
arrange(incwage) %>%
ungroup() %>%
mutate(pdf = wt / sum(wt),
cdf = cumsum(pdf),
percentile = ceiling(100 * cdf))
# Adjust so no percentile is over 100
df$percentile[df$percentile > 100] = 100
# Compute statistics within percentiles
df <- df %>%
select(wt, pair, incwage, sex, pdf, cdf, percentile) %>%
group_by(percentile) %>%
mutate(earnings_threshold_percentile = min(incwage),
pdf_percentile = pdf / sum(pdf),
mean_incwage_percentile = sum(pdf_percentile * incwage)) %>%
ungroup()
# Write CSV
write_csv(df, paste0(path_data, "sample.csv"))
